── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
library(mapview)library(ggplot2)library(osmdata)
Warning: package 'osmdata' was built under R version 4.4.1
Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
Rows: 15096 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (7): user_id, title, description, tags, url, park, Labels
dbl (8): post_id, lat, lon, accuracy, date_upl, year, Image, cluster
dttm (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Read cluster tags and add to dfdf <-read_xlsx(here("data/cluster tag.xlsx")) %>%select(Cluster, `Proposed Tag`) %>%rename(cluster = Cluster, category =`Proposed Tag`) %>%right_join(df, by ="cluster")#Read georeferenced trails datageoref_trails <-st_read(here("data/national_park_trails_georeferenced/Trails_after_georeferencing_ExportFeatures.shp"))
Reading layer `Trails_after_georeferencing_ExportFeatures' from data source
`D:\Sync\National_Park_Movement_Paper\data\national_park_trails_georeferenced\Trails_after_georeferencing_ExportFeatures.shp'
using driver `ESRI Shapefile'
Simple feature collection with 104 features and 1 field
Geometry type: LINESTRING
Dimension: XYZ
Bounding box: xmin: -13034480 ymin: 6569358 xmax: -12821140 ymax: 6843529
z_range: zmin: 0 zmax: 0
Projected CRS: WGS 84 / Pseudo-Mercator
#Convert to spatial data (assuming WSGS 84 CRS) and reproject to UTM Zone 11Ndf_sf <- df %>%st_as_sf(coords =c("lon", "lat"), crs =st_crs(4326)) %>%st_transform(32611)#Get bounding box for data and convert to polygonobs_bbox <-st_bbox(df_sf)obs_bbox_sf <- obs_bbox %>%st_as_sfc() %>%st_as_sf()#Read national park boundaries for BC and ABbc_parks <-st_read(here("data/national_park_boundaries/british_columbia/CLAB_BC_2023-09-08.shp"))
Reading layer `CLAB_BC_2023-09-08' from data source
`D:\Sync\National_Park_Movement_Paper\data\national_park_boundaries\british_columbia\CLAB_BC_2023-09-08.shp'
using driver `ESRI Shapefile'
Simple feature collection with 7 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -132.1088 ymin: 48.53914 xmax: -115.7991 ymax: 52.8101
Geodetic CRS: NAD83(CSRS)
Reading layer `CLAB_AB_2023-09-08' from data source
`D:\Sync\National_Park_Movement_Paper\data\national_park_boundaries\alberta\CLAB_AB_2023-09-08.shp'
using driver `ESRI Shapefile'
Simple feature collection with 5 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -119.5439 ymin: 48.9975 xmax: -111.017 ymax: 60.69088
Geodetic CRS: NAD83(CSRS)
#Combine park boundaries and reproject to correct CRSparks <-bind_rows(bc_parks, ab_parks) %>%st_transform(st_crs(df_sf)) %>%mutate(NAME_E =str_to_title(NAME_E)) %>%mutate(NAME_E =str_replace_all(NAME_E, " National Park Of Canada", "")) %>%filter(!NAME_E %in%c("Mount Revelstoke", "Glacier"))#Get parks that intersect with bounding boxparks_int <-st_intersection(parks, obs_bbox_sf)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
#Subset parks that intersect with bounding boxparks <- parks %>%filter(NAME_E %in% parks_int$NAME_E)#Map parksparks %>%ggplot(aes(fill = NAME_E)) +geom_sf() +theme_bw() +guides(fill =guide_legend(title ="Name"))
3 Spatially group observations by cluster
Assign time period for years relative to covid
df_sf <- df_sf %>%#Classify years by pre, peri, postmutate(period =case_when(year ==2019~"pre", year >2019& year <2023~"peri", year ==2023~"post")) %>%mutate(period =factor(period, levels =c("pre", "peri", "post")))
4 Quantify direction of shifts in cluster centroids
Reproduce the figure generated in Liang et al. (2022) that shows the direction of shifts in cluster centroids
Reference: Liang, Yu, et al. “What is the role of disturbance in catalyzing spatial shifts in forest composition and tree species biomass under climate change?.” Global Change Biology 29.4 (2023): 1160-1177.
#Function to derive the distance between two points and the angle between themcompare_dist_n_angle <-function(pt1, pt2){ dist <-st_distance(pt1, pt2) %>%as.numeric() -> dist#Divide by 1000 to convert to km dist <- dist/1000 azimuth <-st_azimuth(pt1, pt2) %>%as.numeric() -> angle# Convert to cartesian coordinates x <- dist *cos(azimuth) y <- dist *sin(azimuth)#Invert the x coordinate since the UTM coordinate system increases eastward x <--x#Combine into df out_df <-data.frame(dist = dist, angle = angle, x = x, y = y)#Assign angle a direction out_df <- out_df %>%mutate(direction =case_when(angle >=0& angle <90~"NE", angle >=90& angle <180~"SE", angle >=180& angle <270~"SW", angle >=270~"NW")) %>%relocate(direction, .after = dist)return(out_df)}#Test for cluster 1 in pre and posttest_pt1 <-df_clus_centroid %>%filter(period =="pre"& cluster ==1)test_pt2 <- df_clus_centroid %>%filter(period =="post"& cluster ==1)#Test shift direction and magnitude for two points pre and posttest_out <-compare_dist_n_angle(test_pt1, test_pt2)shift_plt <- test_out %>%ggplot(aes(x = x, y = y)) +geom_point(color ="red") +xlim(-max(test_out$dist), max(test_out$dist)) +ylim(-max(test_out$dist), max(test_out$dist)) +#Add horizontal and vertical lines at x=0 and y=0geom_hline(yintercept =0, color ="black") +geom_vline(xintercept =0, color ="black") +coord_fixed() +theme_bw() +labs(x ="East-West Centroid Shift (km)", y ="North-South Centroid Shift (km)") +#Remove all gridlinestheme(panel.grid.major =element_blank(), panel.grid.minor =element_blank(),axis.line =element_line(colour ="black"),#Add black oultine to entire plotpanel.border =element_rect(colour ="black", fill =NA, size =1))
Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Deleting layer `osm_lines' using driver `GPKG'
Writing layer `osm_lines' to data source
`D:/Sync/National_Park_Movement_Paper/data/osm_data/osm_lines.gpkg' using driver `GPKG'
Writing 32017 features with 10 fields and geometry type Unknown (any).
Create a test area for demonstrating the snapping method
test_dist <-50000test_bbox <- obs_bboxtest_bbox['xmin'] <- obs_bbox['xmax'] - test_disttest_bbox['ymin'] <- obs_bbox['ymin']test_bbox['xmax'] <- obs_bbox['xmax']test_bbox['ymax'] <- obs_bbox['ymin'] + test_disttest_bbox_sf <- test_bbox %>%st_bbox() %>%st_as_sfc() %>%st_as_sf()#View test area in relation to total areaggplot() +geom_sf(data = obs_bbox_sf, fill =NA, col ="black") +geom_sf(data = test_bbox_sf, fill =NA, col ="red") +theme_bw()
print("test area is (size in meters squared")
[1] "test area is (size in meters squared"
print(st_area(test_bbox_sf))
2.5e+09 [m^2]
#Get lines and points within test areatest_lines <- osm_lines %>%st_intersection(test_bbox_sf)
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
ggplot() +geom_sf(data = test_bbox_sf, fill =NA, col ="black") +geom_sf(data = test_lines, col ="grey") +geom_sf(data = test_points, col ="red", size =0.9) +theme_bw()
snap_points function based on this post: https://stackoverflow.com/questions/51292952/snap-a-point-to-the-closest-point-on-a-line-segment-using-sf
# remove any object called "c", this breaks the concat fnrm(c)test_points_snap =snap_points(test_points, test_lines, 1000)#Filter the points that did not snaptest_points_snap_fail <- test_points_snap %>%filter(!snapped)test_points_snap_success <- test_points_snap %>%filter(snapped)mapview(test_points, col.regions ="blue") +mapview(test_points_snap_fail, col.regions ="red") +mapview(test_points_snap_success, col.regions ="green") +mapview(test_lines, color ="grey")
print("The mean snap distance is (in meters):")
[1] "The mean snap distance is (in meters):"
mean(test_points_snap$snapped_distance)
[1] 26.80383
To speed up processing time, we can grid the study area and snap points within each grid cell
#Create a grid of study areagrid_size <-50000#50km gridgrid <-st_make_grid(obs_bbox_sf, cellsize = grid_size)#Assign each grid cell an idgrid <- grid %>%st_as_sf() %>%mutate(grid_id =row_number())#Plot gridggplot() +geom_sf(data = obs_bbox_sf, fill =NA, col ="black") +geom_sf(data = grid, fill =NA, col ="red") +theme_bw()
Write function that can process snapping over a grid
snap_points_gridcell <-function(gridcell, points, lines, max_dist, save_dir, verbose=F) {source(utils_fpath)#Fault tolerancetryCatch({#Clip the points points_clip <-st_intersection(points, gridcell) n_points_in_cell <-nrow(points_clip)#Only proceed if there are points in the grid cellif(n_points_in_cell >0){ lines_clip <-st_intersection(lines, gridcell)#Check number of points in grid cellif(verbose){print(paste("There are", n_points_in_cell, "points in grid cell", gridcell$grid_id))}#Apply snapping points_snapped =snap_points(points_clip, lines_clip, max_dist)#Export snapped points fname <-paste0("snapped_points_", gridcell$grid_id, ".gpkg")st_write(points_snapped, file.path(save_dir, fname), append = F) } else {if(verbose){print(paste("No points in grid cell", gridcell$grid_id))} } }, error =function(e){#Print error and tracebackprint(paste("Error with grid cell id:", gridcell$grid_id))print(e)})}#Set vars for snappingsnap_points_outdir <-here("data/snapped_points_in_grid")max_snap_dist <-1000#in meters#Get test grid celltest_cell_id <-12test_cell <- grid[grid$grid_id == test_cell_id,]#Test function on one grid cellsnap_points_gridcell(gridcell=test_cell,points=df_sf, lines=osm_lines, max_dist=max_snap_dist, save_dir=snap_points_outdir,verbose=T )
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
[1] "There are 32 points in grid cell 12"
Deleting layer `snapped_points_12' using driver `GPKG'
Writing layer `snapped_points_12' to data source
`D:/Sync/National_Park_Movement_Paper/data/snapped_points_in_grid/snapped_points_12.gpkg' using driver `GPKG'
Writing 32 features with 19 fields and geometry type Point.
#Load and view snapped pointssnapped_points_test_gridcell <-st_read(file.path(snap_points_outdir, paste0("snapped_points_", test_cell_id, ".gpkg")))
Reading layer `snapped_points_12' from data source
`D:\Sync\National_Park_Movement_Paper\data\snapped_points_in_grid\snapped_points_12.gpkg'
using driver `GPKG'
Simple feature collection with 32 features and 19 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 607178.2 ymin: 5666023 xmax: 612955.7 ymax: 5682032
Projected CRS: WGS 84 / UTM zone 11N
snapped_points_test_gridcell_success <- snapped_points_test_gridcell %>%filter(snapped)ggplot() +geom_sf(data = test_cell, fill =NA, col ="black") +geom_sf(data = df_sf, col ="red") +geom_sf(data = snapped_points_test_gridcell_success, col ="green") +geom_sf(data = osm_lines) +#Limit plot to snapped points bboxcoord_sf(xlim =st_bbox(snapped_points_test_gridcell_success)[c("xmin", "xmax")],ylim =st_bbox(snapped_points_test_gridcell_success)[c("ymin", "ymax")]) +theme_bw() +#Add legend to denote sf feature coloursscale_color_manual(values =c("blue", "green", "black")) +labs(color ="Feature type")
Process snapping across grid (sequential processing for now, might upgrade to parallel later…)
Currently takes about 9 minutes to run sequentially on my machine.
RUN_POINT_SNAPPING <-FALSERM_SNAP_PTS_FILES <-FALSEif(RM_SNAP_PTS_FILES){ files_rm =do.call(file.remove, list(list.files(snap_points_outdir, full.names =TRUE)))}if(RUN_POINT_SNAPPING){ start_time <-Sys.time()for(cell_id inunique(grid$grid_id)){print(paste("Processing grid cell", cell_id, "of", nrow(grid), "cells.")) target_cell <- grid[grid$grid_id == cell_id,]snap_points_gridcell(gridcell=target_cell,points=df_sf, lines=osm_lines, max_dist=max_snap_dist, save_dir=snap_points_outdir,verbose=T) }# Calculate elapsed time in seconds end_time <-Sys.time() t_elapsed <-as.numeric(difftime(end_time, start_time, units ="secs"))# Convert elapsed time tominutes and seconds elapsed_minutes <-floor((t_elapsed %%3600) /60) elapsed_seconds <- t_elapsed %%60# Print the elapsed time in hours, minutes, and secondscat("Point snapping processing time:", elapsed_minutes, "minutes,", elapsed_seconds, "seconds.")}
7 Examine snapped points
snap_pts_ls <-lapply(list.files(snap_points_outdir, full.names =TRUE), st_read, quiet = T)snap_pts <-bind_rows(snap_pts_ls)print("Percentage of points that successfully snapped:")
[1] "Percentage of points that successfully snapped:"
hist(snap_pts$snapped_distance, main ="Histogram of snap distances", xlab ="Snap Distance (m)")
#Remove points that did not snap (too far from roads/trails)snap_pts <- snap_pts %>%filter(snapped)#Export all snapped pointsst_write(snap_pts, file.path(here("data"), "snapped_points_all.gpkg"), append = F)
Deleting layer `snapped_points_all' using driver `GPKG'
Writing layer `snapped_points_all' to data source
`D:/Sync/National_Park_Movement_Paper/data/snapped_points_all.gpkg' using driver `GPKG'
Writing 15045 features with 20 fields and geometry type Point.
Summarize individual user observations
#First, check to see if users have multiple years of observationsusers_obs_yrs<- df_sf %>%st_drop_geometry() %>%group_by(user_id) %>%summarise(n_years =length(unique(year))) %>%mutate(multi_year =ifelse(n_years >1, TRUE, FALSE))#Checkprint("How many users have visited the park over multiple years?")
[1] "How many users have visited the park over multiple years?"
print(table(users_obs_yrs$multi_year))
FALSE TRUE
515 84
#If user is multi-year, split into multiple users labeled as user_id_[year]multi_yr_user_ids <- users_obs_yrs %>%filter(multi_year) %>%pull(user_id)#user_id_yr => user ID where unique users are split into multiple users if multi-year visitordf_sf <- df_sf %>%mutate(user_id_yr =if_else(user_id %in% multi_yr_user_ids, paste0(user_id, "_", year), user_id))print("Number of unique users")
[1] "Number of unique users"
length(unique(df_sf$user_id))
[1] 599
print("Number of unique users after splitting multi-year users")
[1] "Number of unique users after splitting multi-year users"
length(unique(df_sf$user_id_yr))
[1] 714
#Group users based on user_id_yrgrouped_sf <- df_sf %>%group_by(user_id_yr) %>%summarise(n_obs =n()) %>%#Remove obs where user only has 1 observationfilter(n_obs >1) %>%st_cast("MULTIPOINT")DT::datatable(grouped_sf)
hist(grouped_sf$n_obs)
print(median(grouped_sf$n_obs))
[1] 7
8 Visualize density of obs on given sections of the network
Classify OSM lines as either roads or trails
trail_nms <-c("footway", "path", "track", "bridleway", "steps", "cycleway", "via_ferrata")#Classify OSM lines into trails and roadsosm_lines <- osm_lines %>%mutate(line_class =if_else(highway %in% trail_nms, "trail", "road"))#Assign line names to those that do not have themosm_lines <- osm_lines %>%mutate(name =ifelse(is.na(name), paste0(highway, "_", osm_id), name))
Get number of flickr images and distinct users each line segment. Use the snapped points to do this with a very small buffer (1m)
#Assign unique id to each line segmentosm_lines <- osm_lines %>%mutate(line_id =row_number())osm_lines_buf <- osm_lines %>%st_buffer(1)#Intersect obs from each year with lineyr_pts_near_line_ls <-list()yr__pts_near_line_clust_ls <-list()for(yr inunique(snap_pts$year)){print(paste("Processing year", yr)) snap_pts_yr <- snap_pts %>%filter(year == yr) pts_near_line <-st_intersection(osm_lines_buf, snap_pts_yr) %>%st_drop_geometry()#Summarize for line segments pts_near_line_summary <- pts_near_line %>%group_by(line_id) %>%summarise(n_obs =n(),n_users =n_distinct(user_id_yr) ) %>%mutate(year = yr) yr_pts_near_line_ls[[as.character(yr)]] <- pts_near_line_summary#Summarize for line segments AND clusters pts_near_line_summary <- pts_near_line %>%group_by(line_id, cluster) %>%summarise(n_obs =n(),n_users =n_distinct(user_id_yr) )%>%mutate(year = yr) yr__pts_near_line_clust_ls[[as.character(yr)]] <- pts_near_line_summary}
[1] "Processing year 2019"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2020"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2021"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2022"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
[1] "Processing year 2023"
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
`summarise()` has grouped output by 'line_id'. You can override using the
`.groups` argument.
#Combine df list into single dfpts_near_line_summary <-bind_rows(yr_pts_near_line_ls)pts_near_line_summary_clust <-bind_rows(yr__pts_near_line_clust_ls)DT::datatable(pts_near_line_summary)
#Join to linesosm_lines_d <-left_join(osm_lines, pts_near_line_summary, by ="line_id") %>%mutate(n_users =ifelse(is.na(n_users), 0, n_users),n_obs =ifelse(is.na(n_obs), 0, n_obs))osm_lines_d <- osm_lines_d %>%mutate(traffic_class =case_when(n_users ==0~"none", n_users ==1~"1", n_users ==2~"2", n_users >2~">2")) %>%mutate(traffic_class =factor(traffic_class, levels =c("none", "1", "2", ">2")))osm_lines_d %>%filter(n_users >0) %>%ggplot() +geom_sf(aes(color = traffic_class), lwd =1.2) +facet_wrap(~year) +scale_color_manual(values =c("yellow", "orange", "red")) +theme_bw() +#Set background to dark greytheme(panel.background =element_rect(fill ="grey20"),panel.grid.major =element_blank(), panel.grid.minor =element_blank(),axis.line =element_line(colour ="black"),#Add black oultine to entire plotpanel.border =element_rect(colour ="black", fill =NA, size =1))
Analyze traffic on trails vs. roads
#Summarize traffic on trails vs. roads over yearsosm_lines_d %>%st_drop_geometry() %>%group_by(year, line_class) %>%summarise(n_users =sum(n_users)) %>%ggplot() +geom_point(aes(x = year, y = n_users, color = line_class)) +geom_line(aes(x = year, y = n_users, color = line_class)) +theme_bw() +labs(x ="Year",y ="Number of Users",color ="Route Class")
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_line()`).
View top roads and trails for each year
#Get the total number of road and trail obs for each yearyr_route_sum <- osm_lines_d %>%st_drop_geometry() %>%group_by(year, line_class) %>%summarise(total_yr_users =sum(n_users))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
#View the top three roads and trails that had the most traffic across yearstop_routes_df <- osm_lines_d %>%st_drop_geometry() %>%filter(!is.na(name)) %>%group_by(year, line_class, name) %>%summarise(n_users =sum(n_users)) %>%left_join(yr_route_sum, by =c("year", "line_class")) %>%mutate(perc_users =round(n_users/total_yr_users*100, 1)) %>%arrange(year, line_class, desc(perc_users)) %>%slice_max(n_users, n =3)
`summarise()` has grouped output by 'year', 'line_class'. You can override
using the `.groups` argument.
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
9 View change in most frequented road/trail over time
N_TOP_ROUTES <-3#Get top routes that have observations for each yeartop_routes_filt <- osm_lines_d %>%st_drop_geometry() %>%filter(!is.na(name)) %>%group_by(line_class, name, year) %>%summarise(n_users =sum(n_users)) %>%filter(!is.na(year)) %>%pivot_wider(names_from = year, values_from = n_users, values_fill =0) %>%filter(`2019`>0&`2020`>0&`2021`>0&`2022`>0&`2023`>0) %>%pivot_longer(cols =c(`2019`, `2020`, `2021`, `2022`, `2023`), names_to ="year", values_to ="n_users")
`summarise()` has grouped output by 'line_class', 'name'. You can override
using the `.groups` argument.
11 Check out road/trail use density in relation to clusters
#Join to linesosm_lines_d_clst <-left_join(osm_lines, pts_near_line_summary_clust, by ="line_id") %>%mutate(n_users =ifelse(is.na(n_users), 0, n_users),n_obs =ifelse(is.na(n_obs), 0, n_obs))
#View how road and trail use has varied by cluster over timeline_cluster_summary <- osm_lines_d_clst %>%st_drop_geometry() %>%group_by(year, line_class, cluster) %>%summarise(n_users =sum(n_users)) %>%left_join(yr_route_sum, by =c("year", "line_class")) %>%mutate(perc_users =round(n_users/total_yr_users*100, 1)) %>%mutate(cluster =as.factor(cluster))
`summarise()` has grouped output by 'year', 'line_class'. You can override
using the `.groups` argument.
line_clstr_summary_plt <- line_cluster_summary %>%mutate(period =case_when(year ==2019~"pre", year >2019& year <2023~"peri", year ==2023~"post")) %>%mutate(period =factor(period, levels =c("pre", "peri", "post"))) %>%mutate(period =str_to_title(period)) %>%mutate(line_class =paste0(str_to_title(line_class), "s")) %>%filter(!is.na(cluster)) %>%rename(Cluster = cluster) %>%ggplot() +geom_bar(aes(x = period, y = n_users, fill = Cluster), stat ="identity", position ="stack") +facet_wrap(~line_class) +theme_bw() +theme(panel.grid.major =element_blank(), panel.grid.minor =element_blank(),strip.background =element_blank(), strip.text =element_text(face ="bold"),axis.line =element_line(colour ="black"),panel.border =element_rect(colour ="black", fill =NA, size =1)) +labs(x ="Time Period",y ="Number of Users",color ="Cluster")line_clstr_summary_plt +scale_fill_brewer(palette ="Paired")
12 Examine movement of individual users
Get the number of observations each user has
#N obs per useruser_obs <- df_sf %>%st_drop_geometry() %>%group_by(user_id_yr) %>%summarise(n_obs =n()) df_sf <- df_sf %>%left_join(user_obs, by ="user_id_yr")
Check out the travel route of some sample users
Using user_id_yr which is an ID that accounts for users that visited multiple years
CLOSE_PTS_DIST_THRESH <-100#Grab a test user id set.seed(9); test_user_id <- df_sf %>%filter(n_obs >5& n_obs <10) %>%pull(user_id_yr) %>%sample(1)test_user_pts <- df_sf %>%filter(user_id_yr == test_user_id) %>%#Assign points an order id based on the date (oldest - newest)arrange(date) %>%mutate(point_id =paste0("p", row_number()))#Calculate the pairwise distances between all points for userdistances <- test_user_pts %>%st_distance(test_user_pts) %>%drop_units() %>%as.data.frame() %>%mutate(point_id =paste0("p", seq(1, nrow(.)))) %>%setNames(c(paste0("p", seq(1, nrow(.))), "point_id")) %>%pivot_longer(-point_id, names_to ="point_id2", values_to ="dist") %>%filter(point_id != point_id2) %>%mutate(close_pt =ifelse(dist < CLOSE_PTS_DIST_THRESH, TRUE, FALSE)) %>%pivot_wider(names_from = point_id2, values_from = close_pt) %>%#Replace all rows with NA with FALSEmutate_all(~replace(., is.na(.), FALSE))#Convert points to linestring which follow the order based on datetest_user_route <- test_user_pts %>%group_by(user_id_yr) %>%summarise() %>%st_cast("LINESTRING")ggplot() +geom_sf(data = test_user_route, size =0.5, color ="grey") +geom_sf_label(data = test_user_pts, aes(label = user_id_yr), nudge_x =2, nudge_y =2, size =2.5) +theme_bw() +# Remove all gridlinestheme(panel.grid.major =element_blank(), panel.grid.minor =element_blank(),axis.line =element_line(colour ="black"),# Add black outline to entire plotpanel.border =element_rect(colour ="black", fill =NA, size =1))